#!/bin/sh -efu
#
# Copyright (C) 2006-2020  Dmitry V. Levin <ldv@altlinux.org>
# Copyright (C) 2006-2018  Alexey Gladkov <legion@altlinux.org>
#
# Incrementally import source packages to git repository.
#
# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
#

. gear-utils-sh-functions

print_version()
{
	cat <<EOF
$PROG version $PROG_VERSION
Written by Dmitry V. Levin <ldv@altlinux.org>

Copyright (C) 2006-2020  Dmitry V. Levin <ldv@altlinux.org>
Copyright (C) 2006-2018  Alexey Gladkov <legion@altlinux.org>
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
EOF
	exit
}

show_help()
{
	cat <<EOF
$PROG - incrementally import source packages to git repository.

Usage: $PROG [options] <source-package>...
or:    $PROG [options] --stdin

Options:
  --branch=BRANCH           branch name where to import (default is "srpms");
  --import-only             do not perform a checkout after importing;
  --no-unpack               do not unpack source archives;
  --rules=FILENAME          gear rules filename (default is ".gear/rules");
  --spec-pattern=PATTERN    specfile pattern list (default is "*.spec");
  --stdin                   read source package file names from stdin;
  -q, --quiet               try to be more quiet;
  -v, --verbose             print a message for each action;
  -V, --version             print program version and exit;
  -h, --help                show this text and exit.

Report bugs to http://bugzilla.altlinux.org/

EOF
	exit
}

tmpdir=
cleanup_handler()
{
	[ -n "$tmpdir" ] || return 0
	cd "$saved_cwd"
	# Recover index file.
	if [ -z "$index_already_recovered" ]; then
		[ -f "$tmpdir/index" ] &&
			cp -p "$tmpdir/index" "$index_orig" ||
			rm -f "$index_orig" ||:
	fi
	rm -rf -- "$tmpdir"
	# Recover HEAD.
	[ "$branch_orig" = "refs/heads/$branch_import" ] ||
		git symbolic-ref HEAD "$branch_orig"
}

# Heuristically shorten tar directory name.
shorten_dir_name()
{
	local name="$1"; shift
	local short
	short="$(printf %s "$name" |
		 LC_COLLATE=C sed 's/\(-[[:alpha:]]*[0-9]\+\([.[:alpha:]]\+[.[:alnum:]]*\)\?\)\+$//')"
	if [ -n "$short" ] && [ "$short" != "$name" ] && [ ! -e "$short" ]; then
		name="$short"
	fi

	printf %s "$name"
}

optimize_rules()
{
	local rules
	rules="$1" && shift
	[ -s "$rules" ] || return 0

	tmp="$(mktemp rules.XXXXXX)"
	sed -n 's/^[^:]\+: [^ ]*\.\([^ .]\+\).*/\1/p' "$rules" |
		LC_COLLATE=C sort -u |
		while read -r suffix; do
			local quoted
			quote_sed_regexp_variable quoted "$suffix"
			method="$(sed -n 's/^\([^:]\+\): [^ ]*\.'"$quoted"'$/\1/p' "$rules" |LC_COLLATE=C sort -u)"
			case "$method" in
				copy|gzip|bzip2|xz|zstd)
					printf '%s: *.%s\n' "$method" "$suffix"
					;;
				*)
					sed -n 's/^[^:]\+: [^ ]*\.'"$quoted"'$/&/p' "$rules"
					;;
			esac
		done >"$tmp"
	sed -n 's/^[^:]\+: \([^ ]\+ .*\|[^ .]\+$\)/&/p' "$rules" >>"$tmp"
	mv "$tmp" "$rules"
	git update-index --add ${verbose:+--verbose} -- "$rules"
}

write_copy_rule()
{
	local method="$1"; shift

	printf '%s: %s\n' "$method" "$*" >>"$gear_rules"
	git update-index --add ${verbose:+--verbose} -- "$@"
}

list_tar_toplevel_nondir()
{
	local f="$1"; shift

	tar -tvf "$f" |
		sed -n 's/^[^d]\([^[:space:]]\+[[:space:]]\+\)\{5\}\([^/]\+\)$/\2/p'
}

list_zip_toplevel_nondir()
{
	local f="$1"; shift

	zipinfo --h-t -- "$f" |
		sed -n 's/^[^d]\([^[:space:]]\+[[:space:]]\+\)\{8\}\([^/]\+\)$/\2/p'
}

spec_version=
import_tree()
{
	local method_tree="$1"; shift
	local method_copy="$1"; shift
	local f="$1"; shift

	if [ -n "$no_unpack" ]; then
		# If archive unpacking is disabled, then store it as a file.
		import_file 1 "$method_copy" "$f"
		return
	fi

	# Define archive list command and related parameters.
	local source_ls_cmd source_ls_toplevel_nondir_cmd
	local arch_basename method_pack rule_suffix=
	case "$method_tree" in
		tar)
			source_ls_cmd='tar -tf'
			source_ls_toplevel_nondir_cmd=list_tar_toplevel_nondir
			case "$method_copy" in
				bzip2) method_pack=tar.bz2 ;;
				gzip) method_pack=tar.gz ;;
				xz) method_pack=tar.xz ;;
				zstd) method_pack=tar.zst ;;
				*) method_pack=tar ;;
			esac
			case "$f" in
				*.tar)
					arch_basename="${f%.tar}"
					[ "$method_pack" = tar ] ||
						rule_suffix=' suffix=.tar'
					;;
				*.tar.bz2)
					arch_basename="${f%.tar.bz2}"
					[ "$method_pack" = tar.bz2 ] ||
						rule_suffix=' suffix=.tar.bz2'
					;;
				*.tar.gz)
					arch_basename="${f%.tar.gz}"
					[ "$method_pack" = tar.gz ] ||
						rule_suffix=' suffix=.tar.gz'
					;;
				*.tar.xz)
					arch_basename="${f%.tar.xz}"
					[ "$method_pack" = tar.xz ] ||
						rule_suffix=' suffix=.tar.xz'
					;;
				*.tar.zst)
					arch_basename="${f%.tar.zst}"
					[ "$method_pack" = tar.zst ] ||
						rule_suffix=' suffix=.tar.zst'
					;;
				*.tbz)
					arch_basename="${f%.tbz}"
					rule_suffix=' suffix=.tbz'
					;;
				*.tbz2)
					arch_basename="${f%.tbz2}"
					rule_suffix=' suffix=.tbz2'
					;;
				*.tgz)
					arch_basename="${f%.tgz}"
					rule_suffix=' suffix=.tgz'
					;;
				*.txz)
					arch_basename="${f%.txz}"
					rule_suffix=' suffix=.txz'
					;;
				*)
					arch_basename="$f"
					rule_suffix=' suffix='
					;;
			esac
			;;
		zip)
			source_ls_cmd='zipinfo -1 --'
			source_ls_toplevel_nondir_cmd=list_zip_toplevel_nondir
			method_pack=zip
			case "$f" in
				*.zip)	arch_basename="${f%.zip}"
					;;
				*)	arch_basename="$f"
					rule_suffix=' suffix='
					;;
			esac
			;;
		*) fatal "import_tree: $method_tree: unrecognized method"
			;;
	esac

	# Fetch archive list.
	local tree_list
	if ! tree_list="$($source_ls_cmd "$f")" ||
	   [ -z "$tree_list" ]; then
		# If archive is empty or currupted, then store it as a file.
		import_file 1 "$method_copy" "$f"
		return
	fi

	# Avoid unpacking archives with paths containing /../
	local dir_name=
	printf %s "$tree_list" |LC_ALL=C grep -E -qs '^\.\.(/|$)|/\.\.(/|$)' ||
		dir_name="$(printf %s "$tree_list" |
			    tr -s / |			# squeeze slashes
			    sed 's|^/|.&|' |		# prefix leading slash with dot
			    sed 's|^\(\./\)\+||g' |	# strip leading "./"
			    sed -n 's|^\([^/]\+\)\(/.*\)\?$|\1|p' |
			    LC_COLLATE=C sort -u)"
	if [ -z "$dir_name" ]; then
		# If archive dirlist is empty, then store archive as a file.
		import_file 1 "$method_copy" "$f"
		return
	fi

	local base='' subdir unpack_into_subdir=''
	if [ "$dir_name" != "$(printf %s "$dir_name" |LC_COLLATE=C tr -d '[:space:]')" ] ||
	   [ -n "$(printf %s "$dir_name" |LC_COLLATE=C tr -d -- '-[:alnum:]_.,+')" ] ||
	   [ -n "$($source_ls_toplevel_nondir_cmd "$f")" ]; then
		# More than one subdir, funny dir name, or non-dir toplevel files.
		unpack_into_subdir=1
		dir_name="$arch_basename"
	else
		base="$dir_name"
	fi

	local subdir broken=
	subdir="$(mktemp -d subdir.XXXXXX)"

	case "$method_tree" in
		tar) tar -x -C "$subdir" -f "$f" ;;
		zip) unzip -q "$f" -d "$subdir" ;;
	esac || broken=1
	chmod -Rf u+rwX,go-w "$subdir" || broken=1

	if [ -z "$broken" ]; then
		# Add .gitattributes export-ignore file to each empty directory.
		echo '.gitattributes export-ignore' >"$tmpdir/export-ignore" &&
		find "$subdir" -type d -empty -exec ln -- "$tmpdir/export-ignore" '{}/.gitattributes' ';' ||
			broken=1
	fi

	if [ -n "$broken" ]; then
		# Archive is broken, store it as a file.
		rm -rf "$subdir"
		import_file 1 "$method_copy" "$f"
		return
	fi
	rm "$f"

	# Apply heuristics to shorten directory name.
	local short_name
	short_name="$(shorten_dir_name "$dir_name")"

	# Avoid file names clashes.
	if [ -e "$short_name" ] && [ "$dir_name" != "$arch_basename" ]; then
		# Try to shorten archive name.
		short_name="$(shorten_dir_name "$arch_basename")"
	fi

	if [ -e "$short_name" ]; then
		# Shorten heuristics failed, try with full directory name.
		short_name="$dir_name"
		if [ -e "$short_name" ] && [ "$dir_name" != "$arch_basename" ]; then
			# Try with full archive name.
			short_name="$arch_basename"
		fi
	fi

	if [ -e "$short_name" ]; then
		# Full name also clashes, so fall back to file name.
		short_name="$f"
	fi

	# Move temporary directory to final place.
	if [ -n "$unpack_into_subdir" ]; then
		mv "$subdir" "$short_name"
	else
		mv "$subdir/$dir_name" "$short_name"
		rmdir "$subdir"
	fi
	dir_name="$short_name"

	local quoted rule_base='' rule_name=''
	quote_sed_regexp_variable quoted "$spec_version"

	[ "$dir_name-$spec_version" = "$arch_basename" ] ||
		rule_name=" name=$arch_basename"

	[ "$arch_basename" = "$base" ] ||
		rule_base=" base=$base"

	printf '%s: %s%s%s%s\n' "$method_pack" "$dir_name" "$rule_name" "$rule_base" "$rule_suffix" |
		sed "s/${quoted:-@version@}/@version@/g" >>"$gear_rules"

	find "./$dir_name" -type d -name .git \
		-printf "$PROG: Ignoring path: %p/\\n" \
		-execdir rm -rf -- '{}' ';' \
		-prune >&2

	git ls-files -z --others --modified -- "$dir_name" |
		git update-index --add ${verbose:+--verbose} -z --stdin
}

import_file()
{
	local no_unpack="$1"; shift
	local method="$1"; shift

	if [ "$method" = copy ]; then
		write_copy_rule copy "$@"
		return
	fi

	local f="$1"; shift

	# If archive unpacking is enabled, then
	# look for file type inside compressed file.
	local file_type=
	[ -n "$no_unpack" ] ||
		file_type="$(file -bz -- "$f")" ||:

	case "$file_type" in
		*tar\ archive*)
			import_tree tar "$method" "$f"
			;;
		*)
			case "$f,$method" in
				*.bz2,bzip2)
					# if target file exists or uncompression fails,
					# then just copy file as is.
					if [ ! -e "${f%.bz2}" ] && bunzip2 "$f"; then
						write_copy_rule bzip2 "${f%.bz2}"
					else
						write_copy_rule copy "$f"
					fi
					;;
				*.gz,gzip)
					# if target file exists or uncompression fails,
					# then just copy file as is.
					if [ ! -e "${f%.gz}" ] && gunzip "$f"; then
						write_copy_rule gzip "${f%.gz}"
					else
						write_copy_rule copy "$f"
					fi
					;;
				*.xz,xz)
					# if target file exists or uncompression fails,
					# then just copy file as is.
					if [ ! -e "${f%.xz}" ] && unxz "$f"; then
						write_copy_rule xz "${f%.xz}"
					else
						write_copy_rule copy "$f"
					fi
					;;
				*.zst,zstd)
					# if target file exists or uncompression fails,
					# then just copy file as is.
					if [ ! -e "${f%.zst}" ] && unzstd "$f"; then
						write_copy_rule zstd "${f%.zst}"
					else
						write_copy_rule copy "$f"
					fi
					;;
				*)
					# gear copy rules do not support file name
					# transformation, so just copy it as is.
					write_copy_rule copy "$f"
					;;
			esac
			;;
	esac
}

is_srpm()
{
	local file header tag

	file="$1"; shift
	header="$(od -A n -N 8 -t x1 -- "$file")" || return
	case "$header" in
		\ ed\ ab\ ee\ db\ ??\ ??\ 00\ 01)
			return 0
			;;
		\ ed\ ab\ ee\ db\ ??\ ??\ 00\ 00)
			tag="$(rpmquery --qf '%|sourcerpm?{1}|' -p "$file")" &&
			[ -z "$tag" ] &&
			return 0 || return 1
			;;
	esac
	return 1
}

git_commit_fast=
import()
{
	local srpm
	srpm="$1"; shift

	cd "$saved_cwd"
	verbose "Processing $srpm"
	srpm="$(readlink -ev -- "$srpm")"

	local query='
buildtime=%{buildtime:shescape};
changelogname=%|CHANGELOGNAME?{%{CHANGELOGNAME:shescape}}|;
changelogtext=%|CHANGELOGTEXT?{%{CHANGELOGTEXT:shescape}}|;
name=%{NAME:shescape};
version=%{VERSION:shescape};
release=%{RELEASE:shescape};
fileflags_filenames=["%{FILEFLAGS:fflags} "%{FILENAMES:shescape}"\n"];
'

	local buildtime changelogname changelogtext
	local name version release fileflags_filenames
	local response filelist

	is_srpm "$srpm" &&
	response="$(rpmquery -p --qf "$query" -- "$srpm")" &&
	eval "$response" && unset query response &&
	[ -n "$name" ] && [ -n "$version" ] && [ -n "$release" ] &&
	filelist="$(rpm2cpio "$srpm" |cpio --list --quiet )" ||
		fatal "$srpm: Not a valid source rpm package"

	local verrel
	verrel="$version-$release"

	local filespec
	filespec="$(printf %s "$fileflags_filenames" |
		sed -n 's/^[^s ]*s[^s ]* \(.\+\)/\1/p')"
	filespec="${filespec##*/}"

	local author_name author_email
	author_name="$(printf '%s\n' "$changelogname" |
		sed -n 's/^\([^<]\+\)<[^@]\+\(@\| at \)[^@]\+>.*/\1/p' |
		sed -e 's/^[[:space:]]\+//' -e 's/[[:space:]]\+$//')"
	author_email="$(printf '%s\n' "$changelogname" |
		sed -n 's/^[^<]*<\([^@]\+\(@\| at \)[^@]\+\)>.*/\1/p' |
		sed -e 's/^[[:space:]]\+//' -e 's/[[:space:]]\+$//' |
		sed s/alt-linux/altlinux/g)"

	local GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL
	if [ -n "$author_name" ]; then
		GIT_AUTHOR_NAME="$author_name"
		export GIT_AUTHOR_NAME
	fi
	if [ -n "$author_email" ]; then
		GIT_AUTHOR_EMAIL="$author_email"
		export GIT_AUTHOR_EMAIL
	fi

	# Use own directory for checkout.
	rm -rf "$workdir"
	mkdir "$workdir"
	cd "$workdir"
	local cpio_quiet=''
	[ -n "$verbose" ] || cpio_quiet=--quiet
	rpm2cpio "$srpm" |cpio --extract $cpio_quiet
	verbose "Unpacked ${srpm##*/}"

	[ -n "${gear_rules##*/*}" ] ||
		mkdir -p -- "${gear_rules%/*}"

	# Find specfile.
	local pfx="${srpm##*/}: "
	cat "$filespec" > /dev/null || {
		verbose "${pfx}Ignored specfile from fileflags: $filespec"
		filespec=
	}
	local spec
	spec="$(find_specfile_in_cwd "$pfx" ${filelist} ||:)"
	if [ -n "$filespec" ] && [ "$filespec" != "$spec" ]; then
		spec="$filespec"
		printf 'spec: %s\n' "$spec" >>"$gear_rules"
		verbose "${pfx}Set specfile from fileflags: $spec"
	elif [ -n "$spec" ]; then
		if [ -n "${spec##$def_spec_pattern}" ] && [ -n "${spec%%$def_spec_pattern}" ]; then
			printf 'spec: %s\n' "$spec" >>"$gear_rules"
			if [ "$filespec" = "$spec" ]; then
				verbose "${pfx}Set specfile: $spec"
			else
				verbose "${pfx}Set specfile from filelist: $spec"
			fi
		else
			if [ "$filespec" = "$spec" ]; then
				verbose "${pfx}Using specfile: $spec"
			else
				verbose "${pfx}Using specfile from filelist: $spec"
			fi
		fi
	fi

	spec_version=
	if [ -n "$spec" ]; then
		spec_version="$(sed '/^version:[[:space:]]*/I!d;s///;q' "$spec" |
			sed 's/[[:space:]]\+$//')"
	else
		verbose "${pfx}Specfile not found"
	fi

	rm -f "$index_orig"
	local f
	for f in ${filelist}; do
		if [ -L "$f" ] || [ ! -f "$f" ]; then
			message "$f: Non-regular file ignored."
			continue
		fi
		if [ "$f" = "$spec" ]; then
			git update-index --add ${verbose:+--verbose} -- "$f"
			continue
		fi
		if [ "$f" = "$gear_rules" ]; then
			message "$f: rules file $gear_rules ignored."
			continue
		fi
		local file_type
		file_type="$(file -b -- "$f")"
		case "$file_type" in
			*tar\ archive*)
				import_tree tar copy "$f"
				;;
			*Zip\ archive*)
				import_tree zip copy "$f"
				;;
			bzip2\ compressed*)
				import_file "$no_unpack" bzip2 "$f"
				;;
			gzip\ compressed*)
				import_file "$no_unpack" gzip "$f"
				;;
			xz\ compressed*|XZ\ compressed*)
				import_file "$no_unpack" xz "$f"
				;;
			zstd\ compressed*|Zstandard\ compressed*)
				import_file "$no_unpack" zstd "$f"
				;;
			*)
				import_file "$no_unpack" copy "$f"
				;;
		esac
	done

	optimize_rules "$gear_rules"

	local message
	message="$(printf '%s\n\n%s\n' "$verrel" "$changelogtext")"

	if [ -z "$git_commit_fast" ]; then
		git commit --h 2>&1 |grep -F -qs '[--fast]' &&
			git_commit_fast='git commit --fast' ||
			git_commit_fast='git commit'
	fi

	local gts
	gts="$buildtime +0000"
	GIT_AUTHOR_DATE="$gts" GIT_COMMITTER_DATE="$gts" \
		$git_commit_fast --no-verify ${quiet:+-q} -a -m "$message" ||
			fatal "Failed to commit $srpm"
	verbose "Committed $name $verrel"
	GIT_AUTHOR_DATE="$gts" GIT_COMMITTER_DATE="$gts" \
		git tag -a -m "$name $verrel" "$verrel" ||:
	[ -n "${quiet-}" ] || message "Imported $srpm"
}

TEMP=`getopt -n $PROG -o h,q,v,V -l branch:,import-only,no-unpack,no-untar,rules:,spec-pattern:,stdin,help,quiet,verbose,version -- "$@"` ||
	show_usage
eval set -- "$TEMP"

branch_import=srpms
import_only=
no_unpack=
read_from_stdin=
gear_rules='.gear/rules'
while :; do
	case "$1" in
		--) shift; break
			;;
		--branch) shift; branch_import="$1"
			;;
		--import-only) import_only=1
			;;
		--no-unpack|--no-untar) no_unpack=1
			;;
		--rules) shift; gear_rules="$1"
			;;
		--spec-pattern) shift; spec_pattern="$1"
			;;
		--stdin) read_from_stdin=1
			;;
		-h|--help) show_help
			;;
		-q|--quiet) quiet=-q; verbose=
			;;
		-v|--verbose) verbose=-v; quiet=
			;;
		-V|--version) print_version
			;;
		*) fatal "Unrecognized option: $1"
			;;
	esac
	shift
done

if [ -n "$read_from_stdin" ]; then
	# No arguments, please.
	[ "$#" -eq 0 ] ||
		show_usage 'Too many arguments.'
else
	# At least one argument, please.
	[ "$#" -ge 1 ] ||
		show_usage 'Not enough arguments.'
fi

GIT_DIR="$(git rev-parse --git-dir)"
GIT_DIR="$(readlink -ev -- "$GIT_DIR")"
export GIT_DIR

branch_orig="$(git symbolic-ref HEAD)"
head_orig="$(git rev-parse --verify HEAD 2>/dev/null ||:)"

unset GIT_INDEX_FILE ||:
index_orig="$GIT_DIR/index"
index_already_recovered=

# Change to toplevel directory
chdir_to_toplevel

# Save current work directory.
saved_cwd="$(/bin/pwd)"

install_cleanup_handler cleanup_handler
tmpdir="$(mktemp -dt "$PROG.XXXXXXXX")"
workdir="$tmpdir/work"

# Backup index file.
[ ! -f "$index_orig" ] ||
	cp -p "$index_orig" "$tmpdir/index"

git symbolic-ref HEAD "refs/heads/$branch_import"

if [ -n "$read_from_stdin" ]; then
	while read -r REPLY; do
		import "$REPLY"
	done
else
	for REPLY; do
		import "$REPLY"
	done
fi

# Merge after import.
cd "$saved_cwd"
head_new="$(git rev-parse --verify HEAD 2>/dev/null ||:)"
if [ "$head_orig" = "$head_new" ]; then
	verbose "Nothing imported."
elif [ -z "$import_only" ]; then
	if [ -z "$head_orig" ]; then
		git update-ref "$branch_orig" "refs/heads/$branch_import"
		message "Created ${branch_orig#refs/heads/} branch"
		git symbolic-ref HEAD "$branch_orig"
		git checkout -f
		index_already_recovered=1
	elif [ -n "$head_new" ]; then
		git symbolic-ref HEAD "$branch_orig"
		git checkout -f
		index_already_recovered=1
		if [ "$branch_orig" != "refs/heads/$branch_import" ]; then
			git pull ${quiet:+-n} . "refs/heads/$branch_import"
			message "Merged $branch_import branch into ${branch_orig#refs/heads/} branch"
		fi
	fi
fi
